# Base libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
# NetworkX
import networkx as nx
import osmnx as ox
# OS environment setup
from local_directories import *
# Reset random seeds
random_seed = 2674
# Other
neighbourhood_min_nodes = 8
max_distance = 500
# Load Leciester's graph
leicester_osmnx_graph = ox.io.load_graphml(bulk_storage_directory + "/osmnx/raw_excluded/leicester-1864.graphml")
leicester_osmnx_graph_prj = ox.project_graph(leicester_osmnx_graph)
len(list(leicester_osmnx_graph.nodes))
13293
ox.plot_graph(
leicester_osmnx_graph_prj,
node_size=5, node_color="#000000",
edge_color="#000000", edge_linewidth=0.1,
bgcolor="#ffffff",
figsize=(16, 16))
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
# Convert graph to dataframe version
leicester_osmnx_graph_prj_df = None
for node in leicester_osmnx_graph_prj:
node_dict = leicester_osmnx_graph_prj.nodes[node]
node_dict["osmnx_node_id"] = int(node)
# node_dict["osmnx_node_id"] = str(node)
if leicester_osmnx_graph_prj_df is None:
leicester_osmnx_graph_prj_df = pd.DataFrame.from_dict([node_dict])
else:
leicester_osmnx_graph_prj_df = pd.concat([leicester_osmnx_graph_prj_df, pd.DataFrame.from_dict([node_dict])])
leicester_osmnx_graph_prj_df.head()
| y | x | street_count | elevation | elevation_aster | elevation_srtm | lon | lat | osmnx_node_id | ref | highway | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.829804e+06 | 622151.977595 | 3 | 72.0 | 35 | 72 | -1.196195 | 52.604506 | 194739 | NaN | NaN |
| 0 | 5.829991e+06 | 622098.041002 | 3 | 72.0 | 45 | 72 | -1.196922 | 52.606196 | 1551014281 | NaN | NaN |
| 0 | 5.828827e+06 | 622259.813792 | 2 | 79.0 | 57 | 79 | -1.194965 | 52.595696 | 326312 | 21 | motorway_junction |
| 0 | 5.830107e+06 | 622077.742140 | 3 | 79.0 | 43 | 79 | -1.197179 | 52.607245 | 326320 | 21 | motorway_junction |
| 0 | 5.829673e+06 | 622220.645785 | 3 | 74.0 | 35 | 74 | -1.195230 | 52.603314 | 2627867454 | NaN | NaN |
# Load Leciester's base stats
leicester_osmnx_basic_stats = pd.read_csv(this_repo_directory + "/data/leicester-1864_basic_stats_dist500.csv")
leicester_osmnx_basic_stats = leicester_osmnx_basic_stats.rename(columns={"node_id":"osmnx_node_id"})
# Drop NAs created when ego-graph has less than 8 nodes
leicester_osmnx_basic_stats = leicester_osmnx_basic_stats.dropna(subset=["osmnx_node_id"])
leicester_osmnx_basic_stats.head()
| 0 | n | m | k_avg | edge_length_total | edge_length_avg | streets_per_node_avg | streets_per_node_counts | streets_per_node_proportions | intersection_count | street_length_total | street_segment_count | street_length_avg | circuity_avg | self_loop_proportion | osmnx_node_id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7 | NaN | 11.0 | 11.0 | 2.0 | 1261.861 | 114.714636 | 3.0 | {0: 0, 1: 0, 2: 0, 3: 11} | {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} | 11.0 | 1261.861 | 11.0 | 114.714636 | 1.038343 | 0.0 | 337976.0 |
| 8 | NaN | 13.0 | 13.0 | 2.0 | 2126.471 | 163.574692 | 3.0 | {0: 0, 1: 0, 2: 0, 3: 13} | {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} | 13.0 | 2126.471 | 13.0 | 163.574692 | 1.030988 | 0.0 | 337979.0 |
| 9 | NaN | 14.0 | 14.0 | 2.0 | 1870.996 | 133.642571 | 3.0 | {0: 0, 1: 0, 2: 0, 3: 14} | {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} | 14.0 | 1870.996 | 14.0 | 133.642571 | 1.048630 | 0.0 | 337983.0 |
| 10 | NaN | 14.0 | 14.0 | 2.0 | 1815.929 | 129.709214 | 3.0 | {0: 0, 1: 0, 2: 0, 3: 14} | {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} | 14.0 | 1815.929 | 14.0 | 129.709214 | 1.050192 | 0.0 | 337985.0 |
| 11 | NaN | 14.0 | 14.0 | 2.0 | 1870.996 | 133.642571 | 3.0 | {0: 0, 1: 0, 2: 0, 3: 14} | {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} | 14.0 | 1870.996 | 14.0 | 133.642571 | 1.048630 | 0.0 | 337986.0 |
# Load Leciester's embeddings
leicester_emb_df = pd.read_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5.csv")
leicester_emb_df.head()
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 |
| 1 | 337979 | -0.322662 | -0.882213 |
| 2 | 337983 | -0.009132 | 0.948856 |
| 3 | 337985 | -0.136350 | 0.965531 |
| 4 | 337986 | -0.203456 | 0.447374 |
fig = px.scatter(
leicester_emb_df,
x="EMB000",
y="EMB001",
hover_data=['osmnx_node_id'],
width=800, height=800
)
fig.update_layout({"plot_bgcolor": "#ffffff"})
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
for node in leicester_osmnx_graph_prj.nodes:
if len(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values) == 0:
leicester_osmnx_graph_prj.nodes[node]["EMB000"] = None
leicester_osmnx_graph_prj.nodes[node]["EMB001"] = None
else:
leicester_osmnx_graph_prj.nodes[node]["EMB000"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values)
leicester_osmnx_graph_prj.nodes[node]["EMB001"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB001"].values)
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
leicester_osmnx_graph_prj.nodes[node]["EMB000"] for node in leicester_osmnx_graph_prj.nodes],
node_size=10, bgcolor="#ffffff",
figsize=(16, 16))
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
leicester_osmnx_graph_prj.nodes[node]["EMB001"] for node in leicester_osmnx_graph_prj.nodes],
node_size=10, bgcolor="#ffffff",
figsize=(16, 16))
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
Checking correlations between EMB000 and EMB001 and all the basic statistics from OSMnx. No clear correlation found.
leicester_closeness_centrality = pd.DataFrame.from_dict(
nx.closeness_centrality(leicester_osmnx_graph),
orient='index',
columns=['closeness_networkwide'])
leicester_closeness_centrality['osmnx_node_id'] = leicester_closeness_centrality.index
leicester_closeness_centrality.head()
| closeness_networkwide | osmnx_node_id | |
|---|---|---|
| 194739 | 0.000188 | 194739 |
| 326312 | 0.000000 | 326312 |
| 326313 | 0.015414 | 326313 |
| 326320 | 0.000150 | 326320 |
| 326321 | 0.000145 | 326321 |
leicester_betweenness_centrality = pd.DataFrame.from_dict(
nx.betweenness_centrality(leicester_osmnx_graph),
orient='index',
columns=['betweenness_networkwide'])
leicester_betweenness_centrality['osmnx_node_id'] = leicester_betweenness_centrality.index
leicester_betweenness_centrality.head()
| betweenness_networkwide | osmnx_node_id | |
|---|---|---|
| 194739 | 1.132093e-07 | 194739 |
| 326312 | 0.000000e+00 | 326312 |
| 326313 | 0.000000e+00 | 326313 |
| 326320 | 2.983064e-04 | 326320 |
| 326321 | 2.830232e-08 | 326321 |
leicester_pairplot_df = leicester_osmnx_basic_stats[
["osmnx_node_id","n", "m", "k_avg", "edge_length_total", "edge_length_avg",
"streets_per_node_avg", "intersection_count", "street_length_total",
"street_segment_count", "street_length_avg", "circuity_avg"]].merge(
leicester_closeness_centrality,
on="osmnx_node_id"
).merge(
leicester_betweenness_centrality,
on="osmnx_node_id"
).merge(
leicester_emb_df[["osmnx_node_id", "EMB000", "EMB001"]],
on="osmnx_node_id"
)
leicester_pairplot_df_withpooled = pd.merge(
leicester_pairplot_df,
pd.read_csv(this_repo_directory + "/data/leicester-1864_emb-pool_gnnuf_model_v0-5.csv").rename(columns={"EMB000":"EMB000pooled", "EMB001":"EMB001pooled"}),
on="osmnx_node_id"
)
print(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]).corr(method="kendall"))
n m k_avg edge_length_total
n 1.000000 0.891733 0.013712 0.589867 \
m 0.891733 1.000000 0.132379 0.665304
k_avg 0.013712 0.132379 1.000000 0.305397
edge_length_total 0.589867 0.665304 0.305397 1.000000
edge_length_avg -0.296360 -0.234006 0.292995 0.104432
streets_per_node_avg 0.097850 0.117131 0.151434 0.321869
intersection_count 0.826980 0.810128 0.057609 0.674507
street_length_total 0.617244 0.656864 0.211502 0.879629
street_segment_count 0.902224 0.878092 0.040801 0.635415
street_length_avg -0.292572 -0.224905 0.323991 0.108293
circuity_avg -0.118638 -0.118920 -0.000375 -0.089911
closeness_networkwide 0.047026 0.058577 0.090216 0.243250
betweenness_networkwide 0.268942 0.266156 0.047262 0.369444
EMB000 -0.033401 0.013223 0.260863 0.210405
EMB001 -0.103946 -0.100975 0.005101 -0.130952
EMB000pooled -0.138311 -0.068330 0.376507 0.208221
EMB001pooled -0.226253 -0.212787 0.036726 -0.245809
edge_length_avg streets_per_node_avg
n -0.296360 0.097850 \
m -0.234006 0.117131
k_avg 0.292995 0.151434
edge_length_total 0.104432 0.321869
edge_length_avg 1.000000 0.340156
streets_per_node_avg 0.340156 1.000000
intersection_count -0.166574 0.279544
street_length_total 0.080524 0.389084
street_segment_count -0.232123 0.197164
street_length_avg 0.904994 0.316873
circuity_avg 0.048389 -0.153925
closeness_networkwide 0.308200 0.448797
betweenness_networkwide 0.140399 0.226000
EMB000 0.370231 0.280222
EMB001 -0.044676 -0.231995
EMB000pooled 0.579544 0.430593
EMB001pooled -0.021938 -0.420593
intersection_count street_length_total
n 0.826980 0.617244 \
m 0.810128 0.656864
k_avg 0.057609 0.211502
edge_length_total 0.674507 0.879629
edge_length_avg -0.166574 0.080524
streets_per_node_avg 0.279544 0.389084
intersection_count 1.000000 0.739393
street_length_total 0.739393 1.000000
street_segment_count 0.910502 0.687454
street_length_avg -0.170992 0.084041
circuity_avg -0.143807 -0.106058
closeness_networkwide 0.157045 0.284100
betweenness_networkwide 0.319792 0.390078
EMB000 0.047278 0.191975
EMB001 -0.143725 -0.163191
EMB000pooled -0.019241 0.190095
EMB001pooled -0.302492 -0.315328
street_segment_count street_length_avg
n 0.902224 -0.292572 \
m 0.878092 -0.224905
k_avg 0.040801 0.323991
edge_length_total 0.635415 0.108293
edge_length_avg -0.232123 0.904994
streets_per_node_avg 0.197164 0.316873
intersection_count 0.910502 -0.170992
street_length_total 0.687454 0.084041
street_segment_count 1.000000 -0.233779
street_length_avg -0.233779 1.000000
circuity_avg -0.132164 0.043635
closeness_networkwide 0.107291 0.301521
betweenness_networkwide 0.295352 0.126387
EMB000 0.008809 0.365477
EMB001 -0.133773 -0.044273
EMB000pooled -0.069979 0.588756
EMB001pooled -0.284915 -0.014573
circuity_avg closeness_networkwide
n -0.118638 0.047026 \
m -0.118920 0.058577
k_avg -0.000375 0.090216
edge_length_total -0.089911 0.243250
edge_length_avg 0.048389 0.308200
streets_per_node_avg -0.153925 0.448797
intersection_count -0.143807 0.157045
street_length_total -0.106058 0.284100
street_segment_count -0.132164 0.107291
street_length_avg 0.043635 0.301521
circuity_avg 1.000000 -0.116848
closeness_networkwide -0.116848 1.000000
betweenness_networkwide -0.028838 0.245383
EMB000 -0.028421 0.262182
EMB001 0.131059 -0.194019
EMB000pooled -0.065721 0.364707
EMB001pooled 0.225035 -0.337499
betweenness_networkwide EMB000 EMB001
n 0.268942 -0.033401 -0.103946 \
m 0.266156 0.013223 -0.100975
k_avg 0.047262 0.260863 0.005101
edge_length_total 0.369444 0.210405 -0.130952
edge_length_avg 0.140399 0.370231 -0.044676
streets_per_node_avg 0.226000 0.280222 -0.231995
intersection_count 0.319792 0.047278 -0.143725
street_length_total 0.390078 0.191975 -0.163191
street_segment_count 0.295352 0.008809 -0.133773
street_length_avg 0.126387 0.365477 -0.044273
circuity_avg -0.028838 -0.028421 0.131059
closeness_networkwide 0.245383 0.262182 -0.194019
betweenness_networkwide 1.000000 0.241848 -0.025826
EMB000 0.241848 1.000000 -0.050229
EMB001 -0.025826 -0.050229 1.000000
EMB000pooled 0.117201 0.414767 -0.104923
EMB001pooled -0.155374 -0.117148 0.393005
EMB000pooled EMB001pooled
n -0.138311 -0.226253
m -0.068330 -0.212787
k_avg 0.376507 0.036726
edge_length_total 0.208221 -0.245809
edge_length_avg 0.579544 -0.021938
streets_per_node_avg 0.430593 -0.420593
intersection_count -0.019241 -0.302492
street_length_total 0.190095 -0.315328
street_segment_count -0.069979 -0.284915
street_length_avg 0.588756 -0.014573
circuity_avg -0.065721 0.225035
closeness_networkwide 0.364707 -0.337499
betweenness_networkwide 0.117201 -0.155374
EMB000 0.414767 -0.117148
EMB001 -0.104923 0.393005
EMB000pooled 1.000000 -0.170566
EMB001pooled -0.170566 1.000000
# Double-checking difference with Spearman's rho
print(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]).corr(method="spearman"))
n m k_avg edge_length_total
n 1.000000 0.980338 0.015981 0.784142 \
m 0.980338 1.000000 0.188244 0.851935
k_avg 0.015981 0.188244 1.000000 0.432809
edge_length_total 0.784142 0.851935 0.432809 1.000000
edge_length_avg -0.425638 -0.341298 0.432676 0.160137
streets_per_node_avg 0.144189 0.172351 0.206537 0.464561
intersection_count 0.953861 0.944876 0.071353 0.857755
street_length_total 0.806319 0.842110 0.292745 0.972294
street_segment_count 0.983940 0.971088 0.050415 0.827154
street_length_avg -0.420878 -0.328617 0.474856 0.165242
circuity_avg -0.173852 -0.173620 -0.001432 -0.132332
closeness_networkwide 0.069174 0.087432 0.129970 0.360226
betweenness_networkwide 0.382596 0.380400 0.066577 0.529267
EMB000 -0.049975 0.021164 0.388864 0.320914
EMB001 -0.157148 -0.152845 0.003666 -0.202643
EMB000pooled -0.204113 -0.101058 0.545514 0.310028
EMB001pooled -0.329446 -0.310380 0.045068 -0.358725
edge_length_avg streets_per_node_avg
n -0.425638 0.144189 \
m -0.341298 0.172351
k_avg 0.432676 0.206537
edge_length_total 0.160137 0.464561
edge_length_avg 1.000000 0.499462
streets_per_node_avg 0.499462 1.000000
intersection_count -0.240841 0.406843
street_length_total 0.128629 0.558292
street_segment_count -0.338067 0.291215
street_length_avg 0.983105 0.469362
circuity_avg 0.067899 -0.230816
closeness_networkwide 0.456250 0.641996
betweenness_networkwide 0.203815 0.326100
EMB000 0.534784 0.414339
EMB001 -0.066179 -0.348581
EMB000pooled 0.775982 0.610777
EMB001pooled -0.038231 -0.605623
intersection_count street_length_total
n 0.953861 0.806319 \
m 0.944876 0.842110
k_avg 0.071353 0.292745
edge_length_total 0.857755 0.972294
edge_length_avg -0.240841 0.128629
streets_per_node_avg 0.406843 0.558292
intersection_count 1.000000 0.904625
street_length_total 0.904625 1.000000
street_segment_count 0.985890 0.867512
street_length_avg -0.248148 0.133033
circuity_avg -0.209816 -0.156784
closeness_networkwide 0.233107 0.418098
betweenness_networkwide 0.454251 0.557107
EMB000 0.072714 0.294263
EMB001 -0.218416 -0.253735
EMB000pooled -0.026976 0.286724
EMB001pooled -0.434794 -0.456843
street_segment_count street_length_avg
n 0.983940 -0.420878 \
m 0.971088 -0.328617
k_avg 0.050415 0.474856
edge_length_total 0.827154 0.165242
edge_length_avg -0.338067 0.983105
streets_per_node_avg 0.291215 0.469362
intersection_count 0.985890 -0.248148
street_length_total 0.867512 0.133033
street_segment_count 1.000000 -0.340965
street_length_avg -0.340965 1.000000
circuity_avg -0.192780 0.060435
closeness_networkwide 0.159911 0.448151
betweenness_networkwide 0.420247 0.183958
EMB000 0.014611 0.528989
EMB001 -0.203271 -0.065379
EMB000pooled -0.103784 0.785537
EMB001pooled -0.410763 -0.026850
circuity_avg closeness_networkwide
n -0.173852 0.069174 \
m -0.173620 0.087432
k_avg -0.001432 0.129970
edge_length_total -0.132332 0.360226
edge_length_avg 0.067899 0.456250
streets_per_node_avg -0.230816 0.641996
intersection_count -0.209816 0.233107
street_length_total -0.156784 0.418098
street_segment_count -0.192780 0.159911
street_length_avg 0.060435 0.448151
circuity_avg 1.000000 -0.176367
closeness_networkwide -0.176367 1.000000
betweenness_networkwide -0.041296 0.350389
EMB000 -0.043169 0.388640
EMB001 0.195740 -0.292064
EMB000pooled -0.100317 0.537505
EMB001pooled 0.327021 -0.505571
betweenness_networkwide EMB000 EMB001
n 0.382596 -0.049975 -0.157148 \
m 0.380400 0.021164 -0.152845
k_avg 0.066577 0.388864 0.003666
edge_length_total 0.529267 0.320914 -0.202643
edge_length_avg 0.203815 0.534784 -0.066179
streets_per_node_avg 0.326100 0.414339 -0.348581
intersection_count 0.454251 0.072714 -0.218416
street_length_total 0.557107 0.294263 -0.253735
street_segment_count 0.420247 0.014611 -0.203271
street_length_avg 0.183958 0.528989 -0.065379
circuity_avg -0.041296 -0.043169 0.195740
closeness_networkwide 0.350389 0.388640 -0.292064
betweenness_networkwide 1.000000 0.354062 -0.053996
EMB000 0.354062 1.000000 -0.077134
EMB001 -0.053996 -0.077134 1.000000
EMB000pooled 0.170386 0.598860 -0.161475
EMB001pooled -0.225828 -0.184985 0.569221
EMB000pooled EMB001pooled
n -0.204113 -0.329446
m -0.101058 -0.310380
k_avg 0.545514 0.045068
edge_length_total 0.310028 -0.358725
edge_length_avg 0.775982 -0.038231
streets_per_node_avg 0.610777 -0.605623
intersection_count -0.026976 -0.434794
street_length_total 0.286724 -0.456843
street_segment_count -0.103784 -0.410763
street_length_avg 0.785537 -0.026850
circuity_avg -0.100317 0.327021
closeness_networkwide 0.537505 -0.505571
betweenness_networkwide 0.170386 -0.225828
EMB000 0.598860 -0.184985
EMB001 -0.161475 0.569221
EMB000pooled 1.000000 -0.270270
EMB001pooled -0.270270 1.000000
sns.pairplot(leicester_pairplot_df_withpooled.drop(columns=["osmnx_node_id"]), kind="hist")
<seaborn.axisgrid.PairGrid at 0x1aa2e57e0>
leicester_pairplot_df_withpooled.to_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5_with-node-and-basic-stats.csv", index=False)
Checking the ego-network of the nodes with the most extreme values in each direction (min and max) for both UMAP0 and UMAP1
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 6782625866]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 12711 | 6782625866 | -0.999753 | -0.943405 |
ego_6782625866 = nx.generators.ego_graph(
leicester_osmnx_graph, 6782625866,
radius=max_distance, undirected=True, distance="length")
ego_6782625866_prj = ox.project_graph(ego_6782625866)
ox.plot_graph(
ego_6782625866_prj,
node_size=[20 if node == 6782625866 else 5 for node in ego_6782625866_prj.nodes],
node_color=["#e41a1c" if node == 6782625866 else "#ffffff" for node in ego_6782625866_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 354554417]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 4733 | 354554417 | -0.966505 | 0.982919 |
ego_354554417 = nx.generators.ego_graph(
leicester_osmnx_graph, 354554417,
radius=max_distance, undirected=True, distance="length")
ego_354554417_prj = ox.project_graph(ego_354554417)
ox.plot_graph(
ego_354554417_prj,
node_size=[20 if node == 354554417 else 5 for node in ego_354554417_prj.nodes],
node_color=["#e41a1c" if node == 354554417 else "#ffffff" for node in ego_354554417_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 1179199412]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 8021 | 1179199412 | 0.999113 | 0.99909 |
ego_1179199412 = nx.generators.ego_graph(
leicester_osmnx_graph, 1179199412,
radius=max_distance, undirected=True, distance="length")
ego_1179199412_prj = ox.project_graph(ego_1179199412)
ox.plot_graph(
ego_1179199412_prj,
node_size=[20 if node == 1179199412 else 5 for node in ego_1179199412_prj.nodes],
node_color=["#e41a1c" if node == 1179199412 else "#ffffff" for node in ego_1179199412_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 2858142815]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 11121 | 2858142815 | 0.002341 | 0.165836 |
ego_2858142815 = nx.generators.ego_graph(
leicester_osmnx_graph, 2858142815,
radius=max_distance, undirected=True, distance="length")
ego_2858142815_prj = ox.project_graph(ego_2858142815)
ox.plot_graph(
ego_2858142815_prj,
node_size=[20 if node == 2858142815 else 5 for node in ego_2858142815_prj.nodes],
node_color=["#e41a1c" if node == 2858142815 else "#ffffff" for node in ego_2858142815_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 296162322]
| osmnx_node_id | EMB000 | EMB001 | |
|---|---|---|---|
| 3845 | 296162322 | 0.99999 | -0.999946 |
ego_296162322 = nx.generators.ego_graph(
leicester_osmnx_graph, 296162322,
radius=max_distance, undirected=True, distance="length")
ego_296162322_prj = ox.project_graph(ego_296162322)
ox.plot_graph(
ego_296162322_prj,
node_size=[20 if node == 296162322 else 5 for node in ego_296162322_prj.nodes],
node_color=["#e41a1c" if node == 296162322 else "#ffffff" for node in ego_296162322_prj.nodes],
figsize=(5, 5)
)
(<Figure size 500x500 with 1 Axes>, <Axes: >)
leicester_emb_patters_df = leicester_emb_df.copy()
from sklearn.cluster import DBSCAN
leicester_emb_df_clust = leicester_emb_df[["EMB000", "EMB001"]].dropna()
clust = DBSCAN(eps=0.11, min_samples=300)
leicester_emb_patters_df["clust"] = clust.fit_predict(leicester_emb_df_clust)
leicester_emb_patters_df["clust"].nunique()
8
colorbrewer_set1 = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999"]
leicester_emb_patters_df["clust_colour"] = leicester_emb_patters_df["clust"].apply(lambda x: colorbrewer_set1[x])
leicester_emb_patters_df.head()
| osmnx_node_id | EMB000 | EMB001 | clust | clust_colour | |
|---|---|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 | -1 | #999999 |
| 1 | 337979 | -0.322662 | -0.882213 | -1 | #999999 |
| 2 | 337983 | -0.009132 | 0.948856 | -1 | #999999 |
| 3 | 337985 | -0.136350 | 0.965531 | -1 | #999999 |
| 4 | 337986 | -0.203456 | 0.447374 | -1 | #999999 |
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
c=leicester_emb_patters_df.clust_colour,
s=5, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
mode='markers',
marker=dict(color=leicester_emb_patters_df.clust_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
def bivariate_colour(x, limits):
if x[0] is None or x[1] is None:
return None
else:
if x[0] <= limits[0, 0]:
if x[1] <= limits[1, 0]:
# return "#e8e8e8"
return "#e8e8e8"
elif x[1] <= limits[1, 1]:
# return "#cbb8d7"
return "#e4acac"
else:
# return "#9972af"
return "#c85a5a"
if x[0] <= limits[0, 1]:
if x[1] <= limits[1, 0]:
# return "#e4d9ac"
return "#b0d5df"
elif x[1] <= limits[1, 1]:
# return "#c8ada0"
return "#ad9ea5"
else:
# return "#976b82"
return "#985356"
else:
if x[1] <= limits[1, 0]:
# return "#c8b35a"
return "#64acbe"
elif x[1] <= limits[1, 1]:
# return "#af8e53"
return "#627f8c"
else:
# return "#804d36"
return "#574249"
leicester_emb_quantiles = leicester_emb_df[["EMB000", "EMB001"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_patters_df["bivariate_colour"] = leicester_emb_patters_df.apply(
lambda x: bivariate_colour([x["EMB000"], x["EMB001"]], leicester_emb_quantiles), axis=1
)
leicester_emb_patters_df.head()
| osmnx_node_id | EMB000 | EMB001 | clust | clust_colour | bivariate_colour | |
|---|---|---|---|---|---|---|
| 0 | 337976 | -0.212304 | -0.563564 | -1 | #999999 | #e8e8e8 |
| 1 | 337979 | -0.322662 | -0.882213 | -1 | #999999 | #e8e8e8 |
| 2 | 337983 | -0.009132 | 0.948856 | -1 | #999999 | #985356 |
| 3 | 337985 | -0.136350 | 0.965531 | -1 | #999999 | #c85a5a |
| 4 | 337986 | -0.203456 | 0.447374 | -1 | #999999 | #e4acac |
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
c=leicester_emb_patters_df.bivariate_colour,
s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
fig = go.Figure()
fig.add_trace(go.Scatter(
x=leicester_emb_patters_df.EMB000,
y=leicester_emb_patters_df.EMB001,
mode='markers',
marker=dict(color=leicester_emb_patters_df.bivariate_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
leicester_osmnx_bivariate = leicester_osmnx_graph_prj.copy()
for node in leicester_osmnx_bivariate.nodes:
node_bivariate_colour = leicester_emb_patters_df.loc[leicester_emb_patters_df["osmnx_node_id"] == node]
if node_bivariate_colour.empty:
leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = "#000000"
leicester_osmnx_bivariate.nodes[node]["clust_colour"] = "#000000"
leicester_osmnx_bivariate.nodes[node]["node_size"] = 1
else:
leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = node_bivariate_colour["bivariate_colour"].values[0]
leicester_osmnx_bivariate.nodes[node]["clust_colour"] = node_bivariate_colour["clust_colour"].values[0]
leicester_osmnx_bivariate.nodes[node]["node_size"] = 7
ox.plot_graph(
leicester_osmnx_bivariate,
node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 1 for node in leicester_osmnx_bivariate.nodes],
bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
figsize=(12, 12))
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
# ox.plot_graph(
# leicester_osmnx_bivariate,
# node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
# node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*8 if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 8 for node in leicester_osmnx_bivariate.nodes],
# bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
# figsize=(48, 48))
ox.plot_graph(
leicester_osmnx_bivariate,
node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"] for node in leicester_osmnx_bivariate.nodes],
bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
figsize=(12, 12))
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
# ox.plot_graph(
# leicester_osmnx_bivariate,
# node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
# node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*2 for node in leicester_osmnx_bivariate.nodes],
# bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
# figsize=(24, 24))
import geopandas as gpd
leicester_gdf = gpd.GeoDataFrame(
leicester_osmnx_graph_prj_df,
geometry=gpd.points_from_xy(
leicester_osmnx_graph_prj_df.lon,
leicester_osmnx_graph_prj_df.lat
),
crs="EPSG:4326"
).merge(leicester_emb_patters_df, on='osmnx_node_id', how='left').merge(leicester_osmnx_basic_stats, on='osmnx_node_id', how='left')
leicester_gdf.head()
| y | x | street_count | elevation | elevation_aster | elevation_srtm | lon | lat | osmnx_node_id | ref | ... | edge_length_avg | streets_per_node_avg | streets_per_node_counts | streets_per_node_proportions | intersection_count | street_length_total | street_segment_count | street_length_avg | circuity_avg | self_loop_proportion | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5.829804e+06 | 622151.977595 | 3 | 72.0 | 35 | 72 | -1.196195 | 52.604506 | 194739 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 5.829991e+06 | 622098.041002 | 3 | 72.0 | 45 | 72 | -1.196922 | 52.606196 | 1551014281 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 5.828827e+06 | 622259.813792 | 2 | 79.0 | 57 | 79 | -1.194965 | 52.595696 | 326312 | 21 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 5.830107e+06 | 622077.742140 | 3 | 79.0 | 43 | 79 | -1.197179 | 52.607245 | 326320 | 21 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 5.829673e+06 | 622220.645785 | 3 | 74.0 | 35 | 74 | -1.195230 | 52.603314 | 2627867454 | NaN | ... | 160.60625 | 3.0 | {0: 0, 1: 0, 2: 0, 3: 11} | {0: 0.0, 1: 0.0, 2: 0.0, 3: 1.0} | 11.0 | 1927.275 | 12.0 | 160.60625 | 1.021342 | 0.0 |
5 rows × 32 columns
leicester_gdf[leicester_gdf["clust_colour"]!=colorbrewer_set1[-1]].dropna(subset=["EMB000"]).explore(
color="clust_colour",
marker_kwds={"radius": 7}, style_kwds={"stroke": False},
tiles="Stamen Toner"
)